#OS level tools
import os
import time
import itertools
from collections import defaultdict
from glob import glob
import psutil
from functools import partial
import re
#array and data structure
import numpy as np
import pandas as pd
import seaborn as sb
#Ipython display and widgets
import ipywidgets as widgets
from IPython.display import Image, HTML, display
from ipywidgets import interact_manual
#holoviews and plotting
import holoviews as hv
import datashader as ds
from holoviews.operation.datashader import aggregate, shade, datashade, dynspread
from holoviews.operation import decimate
#datashader
from datashader.colors import Greys9
import datashader as ds
from datashader import transfer_functions as tf
from datashader.utils import export_image
#dask parallelization
import dask.dataframe as dd
from dask import compute, delayed
import dask.threaded
import dask.multiprocessing
#color assignment
cmap_all=['white','white']
cmap_parent=['black','grey']
cmap_pop=(['darkgreen','lightgreen'], ['darkorange','yellow'], ['purple','blueviolet'], ['darkblue','lightblue'], ['indianred','red'], ['indianred','red'], ['indianred','red'], ['indianred','red'])
background = '#D3D3D3'
#export path assignment
temp_path='/scratch/'+os.environ['USER']+'/'+os.environ['SLURM_JOBID']
export_path="PNG"
export = partial(export_image, export_path=export_path, background=background)
hv.notebook_extension('bokeh')
display(HTML("<style>.container { width:100% !important; overflow-x: auto;white-space: nowrap;}</style>"))
hv.opts("RGB [toolbar=None, width=400, height=400, bgcolor='#D3D3D3', fontsize={'title':8, 'xlabel':8, 'ylabel':8, 'ticks':3}]")
box_layout = widgets.Layout(overflow_x='scroll',
border='3px solid black',
height='',
flex_flow='column',
display='flex')
row_layout = widgets.Layout(min_width='32000px')
hv.opts("RGB [toolbar=None, width=400, height=400, bgcolor='#D3D3D3', fontsize={'title':15, 'xlabel':10, 'ylabel':10, 'ticks':5}]")
def config_objects(s):
try:
with open(configLabel) as config_file:
config_file.seek(0)
gates={}
for line in config_file:
phenoType=""
line = line.strip()
gate = line.split("\t")
if len(gate)==12:
phenoType=gate[11]
gates.update({"pop"+str(gate[0]):[int(gate[0]), int(gate[1]), int(gate[2]), int(gate[3]), int(gate[4]), int(gate[5]), int(gate[6]), int(gate[7]), int(gate[8]), int(gate[9]), int(gate[10]), phenoType]})
return gates
except:
raise Exception("Error parsing configuration file")
def config_summary(s, h):
try:
with open(s) as config_file:
config_file.seek(0)
gates={}
for line in config_file:
phenoType=""
line = line.strip()
gate = line.split("\t")
xmarker=str(h[int(gate[1])-1])
ymarker=str(h[int(gate[2])-1])
startx=int((float(gate[3])/200)*4096)
starty=int((float(gate[5])/200)*4096)
endx=int((float(gate[4])/200)*4096)
endy=int((float(gate[6])/200)*4096)
parent="pop"+gate[7]
if len(gate)==12:
phenoType=gate[11]
gates.update({"pop"+str(gate[0]):[int(gate[0]), parent, xmarker, ymarker, phenoType, startx, endx, starty, endy]})
return gates
except:
raise Exception("Error parsing configuration file")
_nsre = re.compile('([0-9]+)')
def natural_sort_key(s):
return [int(text) if text.isdigit() else text.lower()
for text in re.split(_nsre, s)]
def natural_sort(l):
#https://stackoverflow.com/a/4836734/846892
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
return sorted(l, key = alphanum_key)
def label_color (pops, row):
eventcolor=0
for i, pop in enumerate(pops):
if row[pop]==0:
eventcolor=i+1
return eventcolor
def label_color2 (pops, row):
eventcolor="base"
for i, pop in enumerate(pops):
if row[pop]==0:
eventcolor=pop
return eventcolor
def parseCohort(s):
cohort_file=open(s)
return
def parseDataFrame(s):
result_file=open(s)
sampleLabel=os.path.splitext(s)[0]
events = sum(1 for line in result_file) -1 #quickly determine number of events
result_file.seek(0) #rewind file to beginning
header = result_file.readline()
header = header.strip()
headers = header.split("\t")
pop_offset=len(headers)
popList=[]
for i,header in enumerate(headers):
if header == "pop1":
pop_offset=i
if "pop" in header:
popList.append(header)
markers = headers[0:pop_offset]
result_file.seek(0) #rewind file to beginning
df = pd.read_csv(s, sep='\t')
dataIndex={}
for i,header in enumerate(headers):
dataIndex.update({header:i})
df['pop0']=0
return [sampleLabel,headers,markers,popList,df]
def parseDAFi(s):
df = pd.read_csv(s, sep='\t')
df['pop0']=0
return df
def html_row(file):
return '<img src="{}" style="display:inline;margin:1px" title="{}"/>'.format(export_path+"/"+file+".png",file,file)
#def html_row(file):
# return '<img src="{}?{}" style="display:inline;margin:1px" title="{}"/>'.format(export_path+"/"+file+".png",time.time(),file,file)
gatedFiles=sorted(glob('Gated/*/flock*.txt'))
gatedDelayed=[[(os.path.split(os.path.dirname(fn))[1]),delayed(parseDAFi)(fn)] for fn in gatedFiles]
sample_labels=[os.path.split(os.path.dirname(fn))[1] for fn in gatedFiles]
dfArray=compute(*gatedDelayed, get=dask.threaded.get)
headers=list(dfArray[0][1])
pop_offset=len(headers)
popList=[]
for i,header in enumerate(headers):
if header == "pop1":
pop_offset=i
if "pop" in header:
popList.append(header)
markers = headers[0:pop_offset]
%%output backend='bokeh'
%%opts Table.gates [width=1200]
%%opts Table.summary [width=1200]
configLabel="pipeline.config"
gates=config_objects(configLabel)
num_gates = len(gates)
summary=config_summary(configLabel, headers)
num_gates = len(summary)
gatesummary = [v for v in summary.values()]
summaryTable=hv.Table(gatesummary,kdims=['Population','Parent','XMarker','YMarker','phenotype','startx', 'endx', 'starty', 'endy'], group='summary', label='Summary')
sortedTable=summaryTable.sort('Population')
sortedTable
%%output backend='bokeh'
axis_popIndexDict = defaultdict(list)
popBounds={}
axises=[]
composite_axis=0
last_xmarker=""
last_ymarker=""
last_parent=0
gatesconfig=[]
for i in range(len(gates)):
pop="pop"+str(i+1)
config=gates.get(pop)
xmarker=str(headers[config[1]-1])
ymarker=str(headers[config[2]-1])
startx=int((float(config[3])/200)*4096)
starty=int((float(config[5])/200)*4096)
endx=int((float(config[4])/200)*4096)
endy=int((float(config[6])/200)*4096)
parent=int(config[7])
ctype=int(config[8])
popBounds.update({pop:[xmarker, ymarker, startx,starty,endx,endy,ctype,"pop"+str(parent)]})
key="axis"+str(composite_axis).zfill(2)
if (xmarker != last_xmarker) or (ymarker != last_ymarker) or (parent != last_parent):
composite_axis=composite_axis+1
key="axis"+str(composite_axis).zfill(2)
axises.append([xmarker,ymarker,key,"pop"+str(parent)])
axis_popIndexDict[key].append(pop)
last_xmarker=xmarker
last_ymarker=ymarker
last_parent=parent
gatesconfig.append([pop,xmarker,ymarker,parent])
num_axises = len(axises)
markerTable=hv.Table(markers,kdims=['Marker'])
axis_popTable=hv.Table(axis_popIndexDict, kdims=['Axis Index'], vdims=['sub populations'])
markerTable+axis_popTable.sort('Axis Index')
group_nocll=['3','66','7','78','76']
group_MRD=['13','23','33','44']
group_cll=['11','19','31','35','39','50','54','58','5','60','74']
sample_order=group_nocll+group_MRD+group_cll
hv.notebook_extension('matplotlib')
%%output backend='bokeh'
%%opts Table.gates [width=1200]
%%opts Table.summary [width=2000]
batchpercent_df = pd.read_csv('Gated/Batch_percentages.txt', sep='\t', index_col=0)
batchevents_df = pd.read_csv('Gated/Batch_events.txt', sep='\t', index_col=0)
hv.Layout(hv.Table(batchpercent_df,kdims=sample_labels, group='summary', label='Summary')+hv.Table(batchevents_df,kdims=sample_labels, group='summary', label='Summary')).cols(1)
%%output backend="bokeh"
%%opts Table [width=1000]
p_df=pd.DataFrame(batchpercent_df.unstack())
p_df.columns=['Percent']
e_df=pd.DataFrame(batchevents_df.unstack())
e_df.columns=['Events']
c_df=pd.concat([p_df,e_df],axis=1, join='outer').reset_index()
c_df.columns=['Sample','Population','Percent','Events']
hv.Table(c_df, label="Test")
%%output backend="bokeh" size=200
percentBoxPlot=hv.BoxWhisker(c_df, kdims=['Population'],vdims='Percent').relabel('Population Percent Box Plot')
eventsBoxPlot=hv.BoxWhisker(c_df, kdims=['Population'],vdims='Events').relabel('Population Events Box Plot')
percentBoxPlot+eventsBoxPlot
%%output backend="bokeh"
%%opts Scatter [width=1200 height=600 scaling_method='width' scaling_factor=0.1 size_index=2 show_grid=True tools=['hover']]
%%opts Scatter (color=Cycle('Category20') alpha=0.3 line_color='k')
%%opts NdOverlay [legend_position='bottom' show_frame=False]
cdf_table=hv.Table(c_df,kdims=['Sample','Population'],vdims=['Percent','Events'])
cdf_scatter = cdf_table.to.scatter('Population', ['Percent','Events'])
cdf_plot=cdf_scatter.overlay('Sample').relabel("Cross Sample Population Percent Comparison with Events Scaling")
(percentBoxPlot*cdf_plot).relabel("All-sample Population Percentage Boxplot with Relative Events Scaling")
#bdf_scatter.overlay('Sample')
%%output backend='matplotlib' size=400
hv.Bars(c_df,kdims=['Sample','Population'],vdims='Percent').relabel('Population Percent Bar Graph')+hv.Bars(c_df,kdims=['Sample','Population'],vdims='Events').relabel('Population Events Bar Graph')
#percentageMap=hv.HoloMap({(sample): hv.Bars(batchpercent_df[sample], 'population', 'percentage') for sample in sample_labels}, kdims=['Sample'])
#hv.NdLayout(percentageMap)
#eventMap=hv.HoloMap({(sample): hv.Bars(batchevents_df[sample], 'population', 'events') for sample in sample_labels}, kdims=['Sample'])
#hv.NdLayout(eventMap)
centroidFiles=sorted(glob('Gated/*/cent*.txt'))
reclusterset=set(["pop"+os.path.basename(fn).split(".")[0].split("centroids")[1]for fn in centroidFiles])
reclustermap={}
currentcluster="pop0"
for j, gate in enumerate(gatesconfig):
parent="pop"+str(gate[3])
if parent in reclusterset:
currentcluster=parent
reclustermap.update({gate[0]: currentcluster})
centDict=dict(((os.path.split(os.path.dirname(fn))[1]+"_pop"+os.path.basename(fn).split(".")[0].split("centroids")[1], parseDAFi(fn)) for fn in centroidFiles))
#centDict=compute(*centDelayed, get=dask.threaded.get)
centArray=[[sample, gate[0], centDict.has_key(sample+"_"+reclustermap.get(gate[0]))] for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)]
centroiddfPlots = hv.HoloMap({(sample, gate[0]): hv.Points(centDict.get(sample+"_"+reclustermap.get(gate[0])), kdims=[gate[1], gate[2]], group="cent")
for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Plot'])
poplist=natural_sort(popBounds.keys())
hv.opts("RGB [width=600, height=600, bgcolor='#D3D3D3', fontsize={'title':15, 'xlabel':10, 'ylabel':10, 'ticks':10}]")
hv.opts("Points.cent (color='purple' marker='+' size=10)")
size=600
popdfPlots = hv.HoloMap({(sample, gate[0]): datashade(hv.Points(dfArray[k][1].loc[dfArray[k][1][gate[0]]==0], kdims=[gate[1], gate[2]]), width=size, height=size, x_range=(0,4096), y_range=(0,4096), dynamic=False, link_inputs=False, cmap=cmap_pop[j])
for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Plot'])
alldfPlots = hv.HoloMap({(sample, gate[0]): datashade(hv.Points(dfArray[k][1], kdims=[gate[1], gate[2]]), width=size, height=size, x_range=(0,4096), y_range=(0,4096), dynamic=False, link_inputs=False, cmap=cmap_all)
for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Plot'])
parentdfPlots = hv.HoloMap({(sample, gate[0]): datashade(hv.Points(dfArray[k][1].loc[(dfArray[k][1]["pop"+str(gate[3])]==0) & (dfArray[k][1][gate[0]]==1)], kdims=[gate[1], gate[2]]), width=size, height=size, x_range=(0,4096), y_range=(0,4096), dynamic=False, link_inputs=False, cmap=cmap_parent)
for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Plot'])
boundarydfPlots = hv.HoloMap({(sample, gate[0]): hv.Bounds((popBounds.get(gate[0])[2], popBounds.get(gate[0])[3], popBounds.get(gate[0])[4], popBounds.get(gate[0])[5])).opts(style=dict(line_color=cmap_pop[j][0],color=cmap_pop[j][0]))
for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Plot'])
centroiddfPlots = hv.HoloMap({(sample, gate[0]): hv.Points(centDict.get(sample+"_"+reclustermap.get(gate[0])), kdims=[gate[1], gate[2]], group="cent")
for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Plot'])
def outputSampleGates(sample):
filename=export_path+"/"+sample
hv.output(hv.NdLayout(combineddfPlots[sample,:]).cols(1), backend='matplotlib', size=200, fig='png', filename=filename)
return filename
def outputPopGates(pop):
filename=export_path+"/"+pop
length=len(combineddfPlots[:,pop])
hv.output(hv.NdLayout(combineddfPlots[:,pop]).cols(length), backend='matplotlib', size=200, fig='png', filename=filename)
return filename
#hv.opts("RGB [width=600, height=600, bgcolor='#D3D3D3', fontsize={'title':15, 'xlabel':10, 'ylabel':10, 'ticks':10}]")
#temp=hv.output(hv.NdLayout(combineddfPlots).cols(5), backend='matplotlib', size=200, fig='png', filename=export_path+"/sample_composite")
combineddfPlots=alldfPlots*parentdfPlots*popdfPlots*boundarydfPlots*centroiddfPlots
testOutput=[[sample, outputSampleGates(sample)] for sample in sample_labels]
#testOutput2=[[pop, outputPopGates(pop)] for pop in poplist]
html=''.join(html_row(sample) for sample in sample_order)
#widgets.HTML(value=html, layout=row_layout)
#images=[Image(filename = export_path+"/"+pop+".png") for pop in poplist]
#display(*images)
html="".join(html_row(sample) for sample in sample_labels)
display(HTML(html))
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')